{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9bcec48b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b8784766",
   "metadata": {},
   "source": [
    "# Read a .csv from a URL with Pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1e116955",
   "metadata": {},
   "source": [
    "Target website: https://www.football-data.co.uk/data.php"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "62ad1716",
   "metadata": {},
   "outputs": [],
   "source": [
    "# reading 1 csv file from the website\n",
    "df_premier21 = pd.read_csv('https://www.football-data.co.uk/mmz4281/2122/E0.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e0b6be7b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Div</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>HomeTeam</th>\n",
       "      <th>AwayTeam</th>\n",
       "      <th>FTHG</th>\n",
       "      <th>FTAG</th>\n",
       "      <th>FTR</th>\n",
       "      <th>HTHG</th>\n",
       "      <th>HTAG</th>\n",
       "      <th>...</th>\n",
       "      <th>AvgC&lt;2.5</th>\n",
       "      <th>AHCh</th>\n",
       "      <th>B365CAHH</th>\n",
       "      <th>B365CAHA</th>\n",
       "      <th>PCAHH</th>\n",
       "      <th>PCAHA</th>\n",
       "      <th>MaxCAHH</th>\n",
       "      <th>MaxCAHA</th>\n",
       "      <th>AvgCAHH</th>\n",
       "      <th>AvgCAHA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>E0</td>\n",
       "      <td>13/08/2021</td>\n",
       "      <td>20:00</td>\n",
       "      <td>Brentford</td>\n",
       "      <td>Arsenal</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.62</td>\n",
       "      <td>0.50</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.13</td>\n",
       "      <td>2.05</td>\n",
       "      <td>2.17</td>\n",
       "      <td>1.80</td>\n",
       "      <td>2.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>12:30</td>\n",
       "      <td>Man United</td>\n",
       "      <td>Leeds</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.25</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.17</td>\n",
       "      <td>1.77</td>\n",
       "      <td>2.19</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.10</td>\n",
       "      <td>1.79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>Brighton</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.62</td>\n",
       "      <td>0.25</td>\n",
       "      <td>1.79</td>\n",
       "      <td>2.15</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.14</td>\n",
       "      <td>1.82</td>\n",
       "      <td>2.19</td>\n",
       "      <td>1.79</td>\n",
       "      <td>2.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Chelsea</td>\n",
       "      <td>Crystal Palace</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.94</td>\n",
       "      <td>-1.50</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.12</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.16</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.06</td>\n",
       "      <td>1.82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Everton</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1.67</td>\n",
       "      <td>-0.50</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.88</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.88</td>\n",
       "      <td>2.08</td>\n",
       "      <td>1.90</td>\n",
       "      <td>2.03</td>\n",
       "      <td>1.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Arsenal</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.32</td>\n",
       "      <td>-1.50</td>\n",
       "      <td>1.92</td>\n",
       "      <td>2.01</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.11</td>\n",
       "      <td>1.90</td>\n",
       "      <td>1.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Crystal Palace</td>\n",
       "      <td>Liverpool</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>2.11</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.96</td>\n",
       "      <td>1.97</td>\n",
       "      <td>1.94</td>\n",
       "      <td>1.97</td>\n",
       "      <td>2.07</td>\n",
       "      <td>2.02</td>\n",
       "      <td>1.92</td>\n",
       "      <td>1.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Leicester</td>\n",
       "      <td>Brighton</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.10</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.92</td>\n",
       "      <td>2.01</td>\n",
       "      <td>1.93</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1.94</td>\n",
       "      <td>2.13</td>\n",
       "      <td>1.87</td>\n",
       "      <td>2.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>16:30</td>\n",
       "      <td>Chelsea</td>\n",
       "      <td>Tottenham</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.84</td>\n",
       "      <td>-0.75</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1.94</td>\n",
       "      <td>1.97</td>\n",
       "      <td>1.95</td>\n",
       "      <td>2.03</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1.96</td>\n",
       "      <td>1.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>E0</td>\n",
       "      <td>05/02/2022</td>\n",
       "      <td>18:00</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>Watford</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.73</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>1.90</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1.89</td>\n",
       "      <td>2.03</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1.87</td>\n",
       "      <td>2.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>215 rows × 106 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Div        Date   Time        HomeTeam        AwayTeam  FTHG  FTAG FTR  \\\n",
       "0    E0  13/08/2021  20:00       Brentford         Arsenal     2     0   H   \n",
       "1    E0  14/08/2021  12:30      Man United           Leeds     5     1   H   \n",
       "2    E0  14/08/2021  15:00         Burnley        Brighton     1     2   A   \n",
       "3    E0  14/08/2021  15:00         Chelsea  Crystal Palace     3     0   H   \n",
       "4    E0  14/08/2021  15:00         Everton     Southampton     3     1   H   \n",
       "..   ..         ...    ...             ...             ...   ...   ...  ..   \n",
       "210  E0  23/01/2022  14:00         Arsenal         Burnley     0     0   D   \n",
       "211  E0  23/01/2022  14:00  Crystal Palace       Liverpool     1     3   A   \n",
       "212  E0  23/01/2022  14:00       Leicester        Brighton     1     1   D   \n",
       "213  E0  23/01/2022  16:30         Chelsea       Tottenham     2     0   H   \n",
       "214  E0  05/02/2022  18:00         Burnley         Watford     0     0   D   \n",
       "\n",
       "     HTHG  HTAG  ... AvgC<2.5  AHCh  B365CAHH  B365CAHA  PCAHH  PCAHA  \\\n",
       "0       1     0  ...     1.62  0.50      1.75      2.05   1.81   2.13   \n",
       "1       1     0  ...     2.25 -1.00      2.05      1.75   2.17   1.77   \n",
       "2       1     0  ...     1.62  0.25      1.79      2.15   1.81   2.14   \n",
       "3       2     0  ...     1.94 -1.50      2.05      1.75   2.12   1.81   \n",
       "4       0     1  ...     1.67 -0.50      2.05      1.88   2.05   1.88   \n",
       "..    ...   ...  ...      ...   ...       ...       ...    ...    ...   \n",
       "210     0     0  ...     2.32 -1.50      1.92      2.01   1.93   2.00   \n",
       "211     0     2  ...     2.11  1.00      1.96      1.97   1.94   1.97   \n",
       "212     0     0  ...     2.10  0.00      1.92      2.01   1.93   1.99   \n",
       "213     0     0  ...     1.84 -0.75      1.99      1.94   1.97   1.95   \n",
       "214     0     0  ...     1.73 -0.25      1.90      2.00   1.89   2.03   \n",
       "\n",
       "     MaxCAHH  MaxCAHA  AvgCAHH  AvgCAHA  \n",
       "0       2.05     2.17     1.80     2.09  \n",
       "1       2.19     1.93     2.10     1.79  \n",
       "2       1.82     2.19     1.79     2.12  \n",
       "3       2.16     1.93     2.06     1.82  \n",
       "4       2.08     1.90     2.03     1.86  \n",
       "..       ...      ...      ...      ...  \n",
       "210     1.93     2.11     1.90     1.98  \n",
       "211     2.07     2.02     1.92     1.96  \n",
       "212     1.94     2.13     1.87     2.01  \n",
       "213     2.03     2.07     1.96     1.92  \n",
       "214     1.93     2.07     1.87     2.01  \n",
       "\n",
       "[215 rows x 106 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# showing dataframe\n",
    "df_premier21"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f9bcd9f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# rename columns\n",
    "df_premier21 = df_premier21.rename(columns={'Date':'date',\n",
    "                                            'HomeTeam':'home_team',\n",
    "                                            'AwayTeam':'away_team',\n",
    "                                            'FTHG': 'home_goals',\n",
    "                                            'FTAG': 'away_goals'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a650ca82",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Div</th>\n",
       "      <th>date</th>\n",
       "      <th>Time</th>\n",
       "      <th>home_team</th>\n",
       "      <th>away_team</th>\n",
       "      <th>home_goals</th>\n",
       "      <th>away_goals</th>\n",
       "      <th>FTR</th>\n",
       "      <th>HTHG</th>\n",
       "      <th>HTAG</th>\n",
       "      <th>...</th>\n",
       "      <th>AvgC&lt;2.5</th>\n",
       "      <th>AHCh</th>\n",
       "      <th>B365CAHH</th>\n",
       "      <th>B365CAHA</th>\n",
       "      <th>PCAHH</th>\n",
       "      <th>PCAHA</th>\n",
       "      <th>MaxCAHH</th>\n",
       "      <th>MaxCAHA</th>\n",
       "      <th>AvgCAHH</th>\n",
       "      <th>AvgCAHA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>E0</td>\n",
       "      <td>13/08/2021</td>\n",
       "      <td>20:00</td>\n",
       "      <td>Brentford</td>\n",
       "      <td>Arsenal</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.62</td>\n",
       "      <td>0.50</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.13</td>\n",
       "      <td>2.05</td>\n",
       "      <td>2.17</td>\n",
       "      <td>1.80</td>\n",
       "      <td>2.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>12:30</td>\n",
       "      <td>Man United</td>\n",
       "      <td>Leeds</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.25</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.17</td>\n",
       "      <td>1.77</td>\n",
       "      <td>2.19</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.10</td>\n",
       "      <td>1.79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>Brighton</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.62</td>\n",
       "      <td>0.25</td>\n",
       "      <td>1.79</td>\n",
       "      <td>2.15</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.14</td>\n",
       "      <td>1.82</td>\n",
       "      <td>2.19</td>\n",
       "      <td>1.79</td>\n",
       "      <td>2.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Chelsea</td>\n",
       "      <td>Crystal Palace</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.94</td>\n",
       "      <td>-1.50</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.12</td>\n",
       "      <td>1.81</td>\n",
       "      <td>2.16</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.06</td>\n",
       "      <td>1.82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E0</td>\n",
       "      <td>14/08/2021</td>\n",
       "      <td>15:00</td>\n",
       "      <td>Everton</td>\n",
       "      <td>Southampton</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>H</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1.67</td>\n",
       "      <td>-0.50</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.88</td>\n",
       "      <td>2.05</td>\n",
       "      <td>1.88</td>\n",
       "      <td>2.08</td>\n",
       "      <td>1.90</td>\n",
       "      <td>2.03</td>\n",
       "      <td>1.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Arsenal</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.32</td>\n",
       "      <td>-1.50</td>\n",
       "      <td>1.92</td>\n",
       "      <td>2.01</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.11</td>\n",
       "      <td>1.90</td>\n",
       "      <td>1.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Crystal Palace</td>\n",
       "      <td>Liverpool</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>2.11</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.96</td>\n",
       "      <td>1.97</td>\n",
       "      <td>1.94</td>\n",
       "      <td>1.97</td>\n",
       "      <td>2.07</td>\n",
       "      <td>2.02</td>\n",
       "      <td>1.92</td>\n",
       "      <td>1.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>14:00</td>\n",
       "      <td>Leicester</td>\n",
       "      <td>Brighton</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.10</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.92</td>\n",
       "      <td>2.01</td>\n",
       "      <td>1.93</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1.94</td>\n",
       "      <td>2.13</td>\n",
       "      <td>1.87</td>\n",
       "      <td>2.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>E0</td>\n",
       "      <td>23/01/2022</td>\n",
       "      <td>16:30</td>\n",
       "      <td>Chelsea</td>\n",
       "      <td>Tottenham</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>H</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.84</td>\n",
       "      <td>-0.75</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1.94</td>\n",
       "      <td>1.97</td>\n",
       "      <td>1.95</td>\n",
       "      <td>2.03</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1.96</td>\n",
       "      <td>1.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>E0</td>\n",
       "      <td>05/02/2022</td>\n",
       "      <td>18:00</td>\n",
       "      <td>Burnley</td>\n",
       "      <td>Watford</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>D</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.73</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>1.90</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1.89</td>\n",
       "      <td>2.03</td>\n",
       "      <td>1.93</td>\n",
       "      <td>2.07</td>\n",
       "      <td>1.87</td>\n",
       "      <td>2.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>215 rows × 106 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Div        date   Time       home_team       away_team  home_goals  \\\n",
       "0    E0  13/08/2021  20:00       Brentford         Arsenal           2   \n",
       "1    E0  14/08/2021  12:30      Man United           Leeds           5   \n",
       "2    E0  14/08/2021  15:00         Burnley        Brighton           1   \n",
       "3    E0  14/08/2021  15:00         Chelsea  Crystal Palace           3   \n",
       "4    E0  14/08/2021  15:00         Everton     Southampton           3   \n",
       "..   ..         ...    ...             ...             ...         ...   \n",
       "210  E0  23/01/2022  14:00         Arsenal         Burnley           0   \n",
       "211  E0  23/01/2022  14:00  Crystal Palace       Liverpool           1   \n",
       "212  E0  23/01/2022  14:00       Leicester        Brighton           1   \n",
       "213  E0  23/01/2022  16:30         Chelsea       Tottenham           2   \n",
       "214  E0  05/02/2022  18:00         Burnley         Watford           0   \n",
       "\n",
       "     away_goals FTR  HTHG  HTAG  ... AvgC<2.5  AHCh  B365CAHH  B365CAHA  \\\n",
       "0             0   H     1     0  ...     1.62  0.50      1.75      2.05   \n",
       "1             1   H     1     0  ...     2.25 -1.00      2.05      1.75   \n",
       "2             2   A     1     0  ...     1.62  0.25      1.79      2.15   \n",
       "3             0   H     2     0  ...     1.94 -1.50      2.05      1.75   \n",
       "4             1   H     0     1  ...     1.67 -0.50      2.05      1.88   \n",
       "..          ...  ..   ...   ...  ...      ...   ...       ...       ...   \n",
       "210           0   D     0     0  ...     2.32 -1.50      1.92      2.01   \n",
       "211           3   A     0     2  ...     2.11  1.00      1.96      1.97   \n",
       "212           1   D     0     0  ...     2.10  0.00      1.92      2.01   \n",
       "213           0   H     0     0  ...     1.84 -0.75      1.99      1.94   \n",
       "214           0   D     0     0  ...     1.73 -0.25      1.90      2.00   \n",
       "\n",
       "     PCAHH  PCAHA  MaxCAHH  MaxCAHA  AvgCAHH  AvgCAHA  \n",
       "0     1.81   2.13     2.05     2.17     1.80     2.09  \n",
       "1     2.17   1.77     2.19     1.93     2.10     1.79  \n",
       "2     1.81   2.14     1.82     2.19     1.79     2.12  \n",
       "3     2.12   1.81     2.16     1.93     2.06     1.82  \n",
       "4     2.05   1.88     2.08     1.90     2.03     1.86  \n",
       "..     ...    ...      ...      ...      ...      ...  \n",
       "210   1.93   2.00     1.93     2.11     1.90     1.98  \n",
       "211   1.94   1.97     2.07     2.02     1.92     1.96  \n",
       "212   1.93   1.99     1.94     2.13     1.87     2.01  \n",
       "213   1.97   1.95     2.03     2.07     1.96     1.92  \n",
       "214   1.89   2.03     1.93     2.07     1.87     2.01  \n",
       "\n",
       "[215 rows x 106 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# show dataframe\n",
    "df_premier21"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "sacred-march",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "sacred-directive",
   "metadata": {},
   "source": [
    "# Read HTML"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dental-cradle",
   "metadata": {},
   "source": [
    "Target Website: https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "sacred-louisville",
   "metadata": {},
   "outputs": [],
   "source": [
    "simpsons = pd.read_html('https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "pending-plaza",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>No.overall</th>\n",
       "      <th>No. inseason</th>\n",
       "      <th>Title</th>\n",
       "      <th>Directed by</th>\n",
       "      <th>Written by</th>\n",
       "      <th>Original air date</th>\n",
       "      <th>Prod.code</th>\n",
       "      <th>U.S. viewers(millions)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>\"Simpsons Roasting on an Open Fire\"</td>\n",
       "      <td>David Silverman</td>\n",
       "      <td>Mimi Pond</td>\n",
       "      <td>December 17, 1989</td>\n",
       "      <td>7G08</td>\n",
       "      <td>26.7[46]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>\"Bart the Genius\"</td>\n",
       "      <td>David Silverman</td>\n",
       "      <td>Jon Vitti</td>\n",
       "      <td>January 14, 1990</td>\n",
       "      <td>7G02</td>\n",
       "      <td>24.5[46]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>\"Homer's Odyssey\"</td>\n",
       "      <td>Wesley Archer</td>\n",
       "      <td>Jay Kogen &amp; Wallace Wolodarsky</td>\n",
       "      <td>January 21, 1990</td>\n",
       "      <td>7G03</td>\n",
       "      <td>27.5[47]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>\"There's No Disgrace Like Home\"</td>\n",
       "      <td>Gregg Vanzo &amp; Kent Butterworth</td>\n",
       "      <td>Al Jean &amp; Mike Reiss</td>\n",
       "      <td>January 28, 1990</td>\n",
       "      <td>7G04</td>\n",
       "      <td>20.2[48]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>\"Bart the General\"</td>\n",
       "      <td>David Silverman</td>\n",
       "      <td>John Swartzwelder</td>\n",
       "      <td>February 4, 1990</td>\n",
       "      <td>7G05</td>\n",
       "      <td>27.1[49]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   No.overall  No. inseason                                Title  \\\n",
       "0           1             1  \"Simpsons Roasting on an Open Fire\"   \n",
       "1           2             2                    \"Bart the Genius\"   \n",
       "2           3             3                    \"Homer's Odyssey\"   \n",
       "3           4             4      \"There's No Disgrace Like Home\"   \n",
       "4           5             5                   \"Bart the General\"   \n",
       "\n",
       "                      Directed by                      Written by  \\\n",
       "0                 David Silverman                       Mimi Pond   \n",
       "1                 David Silverman                       Jon Vitti   \n",
       "2                   Wesley Archer  Jay Kogen & Wallace Wolodarsky   \n",
       "3  Gregg Vanzo & Kent Butterworth            Al Jean & Mike Reiss   \n",
       "4                 David Silverman               John Swartzwelder   \n",
       "\n",
       "   Original air date Prod.code U.S. viewers(millions)  \n",
       "0  December 17, 1989      7G08               26.7[46]  \n",
       "1   January 14, 1990      7G02               24.5[46]  \n",
       "2   January 21, 1990      7G03               27.5[47]  \n",
       "3   January 28, 1990      7G04               20.2[48]  \n",
       "4   February 4, 1990      7G05               27.1[49]  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "simpsons[1].head()"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "nbTranslate": {
   "displayLangs": [
    "es",
    "en"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "es",
   "useGoogleTranslate": true
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
